InfoMagic Internet Tools 1993 July

home *** CD-ROM | disk | FTP | other *** search

/ InfoMagic Internet Tools 1993 July / Internet Tools.iso / RockRidge / info-service / www / dev / mail2html / mail2HTML.c < prev next >

Wrap

C/C++ Source or Header | 1993-01-31 | 31.7 KB | 1,037 lines

/************************************************************************ * * * Program: mail2HTML.c * * * *----------------------------------------------------------------------* * Description: Convert Mail/News Files to HTML (Prototype) * * * *----------------------------------------------------------------------* * Copyright (C) 1993 Basis Systeme netzwerk (BSn) * * Franz-Wolter Strasse 42 * * D-8000 Munich 81 * * Federal Republic of Germany * * * * Redistribution and use in source and binary forms are permitted * * provided that the above copyright notice and this paragraph are * * duplicated in all such forms and that any documentation, * * advertising materials, and other materials related to such * * distribution and use acknowledge that the software was developed * * by Basis Systeme netzwerk/Munich. * * * * This is distributed in the hope that it will be useful, but * * WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * * ************************************************************************/ #ifndef lint static char RCS_id[] = "$Header: /usr/export/home/edz/WWW/experimental/RCS/mail2html.c,v 0.02 1993/01/21 20:36:49 edz Exp edz $"; #endif /************* * $Log: mail2html.c,v $ * Revision 0.3 1993/01/21 20:36:49 edz * Prepare for distribution * * Revision 0.2 1993/01/21 08:10:49 edz * Removed Redundant address from Table of Contents structure * Changed the name of a few functions * * Revision 0.1 1993/01/10 20:25:49 edz * Initial revision * *************/ #define _MAIN_C /************************************************************************ * WARNING: * * DON'T complain that this looks like a "one-off" hack! Why? * * 'cause it is. * * I don't seem to have any mail around here that breaks this * * program and it seems to work (and I hope follows RFC822) but * * I am sure that SOMEONE must have some mail that this very simple * * parser breaks (Greetings from Murphy). * * * * NOTES: * * You need an ANSI-C compiler and libraries to compile this. * * * * (Q) Why did I not use Perl? * * (A) I have not figured out why EVERYONE else in using Perl * * for these kinds of programs. * * (Q) Why did I write this? * * (A) Mail like Usenet News is a very common information source * * that is already Hypertext (The file system, man pages and * * your Window systems are also Hypertext). * * Before you even have a chance to ask. Hypertext is a MODEL * * of Representation not an implementation. The World Wide Web, * * SOFABED, HyTime, Xerox Notes, HyperCard, HyTelnet ... are * * all implementations of a model for hypertext. The programs * * www, viola, hytelnet, notes, ... are all user interfaces to * * the respective implementations. * * * * The often cited reasons for the success of the Gopher model * * in contrast to the W3 model: that W3 requires hypertext * * documents and that hypertext documents are rare is faulty. * * A html editor would be welcome but is not as important as * * the community belief has stipulated. * * W3 represents a paradigm change. * * * * With Man pages, Mail, News, native HTML, GNU Info, * * Hytelnet/MaxThink, Internet Resource files (FAQs), * * WAIS, Gopher (the current gateway is incomplete), * * Archie, Directory Assistance (DIXIE/X500), CSO phone * * books * * ... there is more than enough! * * * * With some simple and transparents auto-tagers the whole * * Internet, Usenet and ... is the web. We hope to have a man * * page autotagger done in the near future (the current crop * * --- at least the ones I know--- are NOT autotaggers). * * * * This program is only a technology test. We hope to have a * * a mail transport done is the very near future. * * We are currently experimenting with several user authenitication * * schemes (a'la POP). * * * * If you use this TEST-code we would like to hear from you. * * * * TODO: * * (i) Add Support for MIME (RFC1341) * * (ii) Add Support for Configuration (RRC1343) * * (ii) Add Support for Multinational headers (RFC1342) * * (iii) Add X.400 Support * * (iv) Use a more intellegent message ID algorithm. The current * * implementation requires that the folder that contains the * * reference was at one time accessed, viz. that a delivery * * or cron process ran a bogus parse. Given the other info * * available (eg. time) one could narrow the search. * * (v) Use ndbm/gdbm instead of stupid ASCII file list/Remove * * ID Duplication. * * (vi) Fold into WWW Daemon (see Gopher), eg. * * WWW/Mail/mbox returns the table of Contents * * references have names like "/MessageID/AA04187@BSNGATE" * * (vii) Fold in state system transport (currently its stateless) * ************************************************************************/ /*- * /MessageID/<MAIL-ID> returns the .html file corresponding to <MAIL-ID> * */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <sys/stat.h> #include "Entities.h" #include "Ctypes.h" #include "Ctypes.c" #ifndef MAXPATHLEN # ifdef _MAX_PATH # define MAXPATHLEN _MAX_PATH # else # define MAXPATHLEN 1024 # endif #endif /*----------------------- User Configuration ----------------------------- */ /* The file below must be readable and writeable */ #define MESSAGE_INDEX_FILE "/var/adm/MESSAGES.INDEX" #define CONTENTS_EXTENSION ".MAIL_BODIES" #define DIRECTORY_EXTENSION ".TABLE_OF_CONTENTS" /*---------------------- End User Configuration -------------------------- */ #ifndef TRUE # define TRUE 1 # define FALSE 0 #endif /* Exit Codes */ #define E_ENOENT 02, "Sorry Document is not available or access is restricted" #define E_USAGE 64, "Incorrect Document request" #define E_NOINPUT 66, "Cannot open input" #define E_IOERR 74 "input/output error" #define E_SOFTWARE 70, "internal software error" #define E_NOTEMP 75, "INTERNAL ERROR: Can't create a file!" static const char IndexFile[] = MESSAGE_INDEX_FILE; static const char body_ext[] = CONTENTS_EXTENSION; static const char HEX[] = "0123456789ABCDEF"; #ifdef _MSC_VER /* The Microsoft compiler (Xenix/OS2/NT) */ # define strncasecomp strnicmp #else /* Case INDEPENDENT version of strncmp() */ static int strncasecomp(const char *str1, const char *str2, size_t n) { const char *p = str1; const char *q = str2; int diff; for (p = str1, q = str2;; p++, q++) { if (p == str1 + n) return 0; if (*p == '\0' || *q == '\0') return *p - *q; if ((diff = tolower(*p) - tolower(*q)) != 0) return diff; } /* NOTREACHED */ } #endif #ifdef NEED_STRDUP static char * strdup(const char *str) { char *tcp; if ((tcp = (char *) malloc(strlen(str) + 1)) != NULL) strcpy(tcp, str); return tcp; } #endif /* Structure to Build a table of contents */ typedef struct _contents { char *anchor; /* Reference (HREF="<anchor>") */ char *subject;/* Name of anchor */ char *author; /* Full Name of Author */ int isNews; /* 1 ==> News else Mail */ char *group; /* Newgroup (NULL if Mail) */ long start; /* start of message (offset) */ struct _contents *next; /* Next element in linked list */ } contents_t; /* Add an anchor to the table of contents */ void AddMessage(contents_t ** Contents, long start, int isNews, const char *anchor, const char *subject, const char *author, const char *group) { contents_t *tp; /* Build a table of contents in reverse order */ if ((tp = (contents_t *) malloc(sizeof(contents_t))) != NULL) { tp -> start = start; tp -> isNews = isNews; tp -> anchor = strdup(anchor); tp -> subject = strdup(subject); tp -> author = strdup(author); tp -> group = (isNews ? strdup(group) : NULL); tp -> next = *Contents; *Contents = tp; } } /* * This is a quick hack to speed up searching the MESSAGES.INDEX file * for the correct entry */ char HASH(const unsigned char *name) { unsigned short hash = 0; int i; for (i = 0; name[i]; i++) hash += (((short)TOISO(name[i])) << (1 + (i % 8))) + name[i]; return (char) ((hash % 225) + 30); } /* Encode reference as HTML compliant */ static char * EncodeAnchor(char *buf, const unsigned char *anchor, int case_sensitive) { char *tp1 = buf; const char *tp2; unsigned char ch; /* Note RFC822 specifies 7-bit headers (Message IDs are 6 bit) */ /* Replace non acceptable chars (# and %) and make uppercase */ for (tp2 = anchor; (ch = *tp2) != '\0'; tp2++) if (!ISPATH(ch) || ch == '#' || ch == '%') { *tp1++ = '%'; *tp1++ = HEX[(TOISO(ch) & '\377') >> 4]; *tp1++ = HEX[(TOISO(ch) & '\377') % 16]; } else *tp1++ = (case_sensitive ? (char)ch : (char)TOUPPER(ch)); *tp1 = '\0'; return (buf); } /* Append dictionary to parsed message id list */ void DumpDictionary(contents_t * Contents, const char *filename, long end) { if (Contents) { contents_t *tp; FILE *fp; char path[MAXPATHLEN + 256]; if ((fp = fopen(IndexFile, "a")) == NULL) return; EncodeAnchor(path, filename, TRUE); for (tp = Contents; tp != NULL; tp = tp -> next) { fprintf(fp, "%c%s\t%s%s\t%ld-%ld\tFrom %s: %s\n", HASH(tp -> anchor), tp -> anchor, path, body_ext, tp -> start, end, tp -> author, tp -> subject); end = tp -> start; } } } /* Print the Table of Contents */ void PrintContents(contents_t * Contents, FILE * fp) { if (Contents != NULL) { contents_t *tp; fprintf(fp, "\n"); fprintf(fp, "<H1>Table of Contents</H1>\n<DL>\n"); for (tp = Contents; tp != NULL; tp = tp -> next) { fprintf(fp, "<DT><A HREF=\"/MessageID/%s\">", tp -> anchor); if (tp -> isNews) fprintf(fp, "%s in %s", tp -> author, tp -> group); else fprintf(fp, "%s from %s", "Mail", tp -> author); fprintf(fp, "</A><DD>%s\n", tp -> subject); } fprintf(fp, "</DL><P>\n\n"); } } /* Strip trailing white space */ char * StripTail(char *line) { char *tcp = line + strlen(line) - 1; while (*tcp == '\r' || *tcp == '\n' || *tcp == ' ' || *tcp == '\t') *tcp-- = '\0'; return line; } /* Strip trailing white space and move to first non-white character */ static char * HTStrip(char *line) { char *tcp; for (tcp = StripTail(line); ISWHITE(*tcp); tcp++) /* loop */ ; return tcp; } /* Rewind input and copy to output stream */ static void CatStream(FILE * infp, FILE * outfp) { register int ch; if (infp) { fflush(infp); rewind(infp); while ((ch = getc(infp)) != EOF) putc((char) ch, outfp); } } /* Decode HTML reference */ static char * DecodeAnchor(char *buf, const unsigned char *anchor) { char *tp1 = buf; const char *tp2 = anchor; while (*tp2) { if (*tp2 == '%') { char *tcp; unsigned ch = 0; if ((tcp = strchr(HEX, *++tp2)) != NULL) ch = (tcp - HEX) << 4; if ((tcp = strchr(HEX, *++tp2)) != NULL) ch += tcp - HEX; *tp1 = (char)ch; tp2++; } else *tp1++ = *tp2++; } *tp1 = '\0'; return buf; } /* In "XXX <YYY> ZZZZ" return "YYY" */ static char * MessageKey(char *buf, char *line) { char *tp1; char *tp2; if ((tp1 = strchr(line, '<')) != NULL) { if ((tp2 = strchr(++tp1, '>')) != NULL) *tp2 = '\0'; } else tp1 = line; /* Message Keys are CASE INSENSITIVE */ return EncodeAnchor(buf, HTStrip(tp1), FALSE); } /*- * Find Author's name in mail address * In "XXX (YYY)" or YYY <XXX>" return "YYY" * Find Author's address in mail address * In "XXX (YYY)" or YYY <XXX>" return "XXX" */ static char * NameKey(char *buf, const char *key, int author) { char *s, *e; char email[256]; char p1, p2, b1, b2; if (author) { p1 = '('; p2 = ')'; b1 = '<'; b2 = '>'; } else { p1 = '<'; p2 = '>'; b1 = '('; b2 = ')'; } strcpy(email, key); if (((s = strchr(email, p1)) != NULL) && ((e = strchr(email, p2)) != NULL)) { if (e > s) { *e = '\0'; /* Chop off everything after p2 (')' or '>') */ strcpy(email, s + 1); } } else if (((s = strchr(email, b1)) != NULL) && ((e = strchr(email, b2)) != NULL)) { if (e > s) strcpy(s, e + 1); /* Remove <...> or (...) */ } strcpy(buf, HTStrip(email)); /* Remove leading and trailing spaces */ return buf; } /* * This Function returns a static storage area, it is the duty of the caller * to save it. */ static char * Anchor(char *line) { char *tp1; char *tp2 = line; static char tmp[BUFSIZ]; tmp[0] = '\0'; while (tp2 != NULL && (tp1 = strchr(tp2, '<')) != NULL) { if (tp1 > tp2) { *tp1 = '\0'; strcat(tmp, tp2); } if ((tp2 = strchr(++tp1, '>')) != NULL) *tp2++ = '\0'; tp1 = HTStrip(tp1); strcat(tmp, "<A HREF=\"/MessageID/"); EncodeAnchor(tmp + strlen(tmp), tp1, FALSE); sprintf(tmp + strlen(tmp), "\">%s</A>", tp1); } /* while */ if (tp2 != NULL) strcat(tmp, tp2); return tmp; } /*- * Send out a line of the message body. * (1) Use Latin-1 public entities * (2) Translate URLs, eg. ftp://site:port/path to: * <A HREF="ftp/site:port/path">ftp/site:port/path</A> * (3) Markup VT100 Style underlined text (eg. man) as * Strong. * We don't confirm that the protocol is valid (registered) * --- would be very simple but the list is growing too fast * (file, ftp, http, wais, gopher, prospero, ... ) */ static char * BodyLine(char *outbuf, unsigned char *line, int nl) { unsigned char *tcp = outbuf; unsigned char *tp = line; unsigned char ch; while (( ch = *tp) != '\0') if (ch == '_' && *(tp+1) == '\b') { char buf[256]; char tmp[256]; char *ptr = buf; /* VT100 Underlined text */ do { tp += 2; /* Skip _<Ctrl-H> */ *ptr++ = *tp++; } while (*tp == '_' && *(tp+1) == '\b'); *ptr = '\0'; ptr = BodyLine(tmp, buf, FALSE); /* Fixup chars */ /* Underlined Text is marked strong */ strcpy(tcp, "<strong>"); strcat(tcp, ptr); strcat(tcp, "</strong>"); tcp += strlen(tcp); } else if ((ch == ':') && (*(tp+1) == '/') && (*(tp+2) == '/') && (tp >= &line[2]) && ISALPHA(*(tp-1))) { unsigned char *tp2; /* Saw a URL Magic Back up */ do { --tp; --tcp; } while (ISALPHA(*tp) && tp >= line); if (!ISALPHA(*tp)) { tp++; tcp++; } /* Insert Anchor */ for (tp2 = tp; ISPATH(*tp2); tp2++) /* loop */ ; if (( *(tcp-1) == '"' || *(tcp-1) == '\'') && *tp2 == *(tp-1)) { /* quoted arguments */ *tp2++ = '\0'; /* ASCIIZ */ sprintf(--tcp, "<A HREF=\"%s\">%s%c</A>", tp, tp - 1, *(tp-1)); } else { char ch; ch = *tp2; /* Save character */ *tp2 = '\0'; /* ASCIIZ */ sprintf(tcp, "<A HREF=\"%s\">%s</A>", tp, tp); *tp2 = ch; /* Replace character */ } tp = tp2; /* Set pointer to tail */ tcp += strlen(tcp); /* Go to tail */ } else if (Markups[ch].len) { memcpy(tcp, Markups[ch].entity, Markups[ch].len); tcp += Markups[ch].len; tp++; } else if (!ISASCII(ch) || (ISCNTRL(ch) && !ISWHITE(ch))) { *tcp++ = '&'; *tcp++ = '#'; *tcp++ = (unsigned char) ((TOISO(ch) / 100) + '0'); *tcp++ = (unsigned char) ((TOISO(ch) % 100) / 10 + '0'); *tcp++ = (unsigned char) ((TOISO(ch) % 10) + '0'); *tcp++ = ';'; tp++; } else *tcp++ = *tp++; if (nl) *tcp++ = '\n'; *tcp = '\0'; return outbuf; } /* Put a line of the Body */ static void PutBodyLine(unsigned char *line, FILE * outfp) { unsigned char outbuf[BUFSIZ]; fputs(BodyLine(outbuf, line, TRUE), outfp); } /* Headers should be 7 bit */ static char * HeaderLine(char *outbuf, unsigned char *line) { /* For now pretend we have 8 bit headers (latter RFC1342) */ return BodyLine(outbuf, line, FALSE); } /*- * IsMailFromLine - Is this a legal unix mail "From " line? * * Given a line of input will check to see if it matches the standard * unix mail "from " header format. Returns 0 if it does and <0 if not. * * 2 - Very strict, also checks that each field contains a legal value. * * Assumptions: Not having the definitive unix mailbox reference I have * assumed that unix mailbox headers follow this format: * * From <person> <date> <garbage> * * Where <person> is the address of the sender, being an ordinary * string with no white space imbedded in it, and <date> is the date of * posting, in ctime(3C) format. * * This would, on the face of it, seem valid. I (Bernd) have yet to find a * unix mailbox header which doesn't follow this format. * * From: Bernd Wechner (bernd@bhpcpd.kembla.oz.au) * Obfuscated by: KFS (as usual) */ static int IsMailFromLine(char *line) { #define MAX_FIELDS 10 char *fields[MAX_FIELDS]; char *sender_tail; register char *lp, **fp; register int n, i; const char legal_day[] = "SunMonTueWedThuFriSat"; const char legal_month[] = "JanFebMarAprMayJunJulAugSepOctNovDec"; const int legal_numbers[] = {1, 31, 0, 23, 0, 59, 0, 60, 1969, 2199}; if (strncmp(line, "From ", 5)) return -100; lp = line + 5; /* sender day mon dd hh:mm:ss year */ for (n = 0, fp = fields; n < MAX_FIELDS; n++) { while (*lp && *lp != '\n' && ISASCII(*lp) && ISWHITE(*lp)) lp++; if (*lp == '\0' || *lp == '\n') break; *fp++ = lp; while (*lp && ISASCII(*lp) && !ISWHITE(*lp)) if (*lp++ == ':' && (n == 4 || n == 5)) break; if (n == 0) sender_tail = lp; } if (n < 8) return -200 - n; fp = fields; if (n > 8 && !ISNUM(fp[7][0])) fp[7] = fp[8]; /* ... TZ year */ if (n > 9 && !ISNUM(fp[7][0])) fp[7] = fp[9]; /* ... TZ DST year */ fp++; for (i = 0; i < 21; i += 3) if (strncmp(*fp, &legal_day[i], 3) == 0) break; if (i == 21) return -1; fp++; for (i = 0; i < 36; i += 3) if (strncmp(*fp, &legal_month[i], 3) == 0) break; if (i == 36) return -2; for (i = 0; i < 10; i += 2) { lp = *++fp; if (!ISNUM(*lp)) return -20 - i; n = atoi(lp); if (n < legal_numbers[i] || legal_numbers[i + 1] < n) return -10 - i; } return 0; } /*- * Start of News: * "Article <Number> of <Newsgroup>:" */ static int IsNewsLine(char *line, int *article, char **group) { int i; if (strncmp(line, "Article ", 8)) return -500; line += 8; /* Skip white space */ while (ISWHITE(*line)) line++; if (!ISNUM(*line)) return -400; i = atoi(line); /* skip number data */ while (ISNUM(*line)) line++; if (!ISWHITE(*line)) return -300; /* Skip white space */ while (ISWHITE(*line)) line++; if (line[0] != 'o' || line[1] != 'f') return -200; /* Skip the of */ line += 2; if (!ISWHITE(*line)) return -100; /* Skip white space */ while (ISWHITE(*line)) line++; if (*line == '\0') return -10; /* Missing Group */ /* OK, if was "Article NNN of XXX.XXX.XXXXX:" */ { static char grp[60]; char tmp[126]; size_t tail; strncpy(grp, HeaderLine(tmp, line), sizeof(grp) - 1); grp[sizeof(grp)] = '\0'; /* Strip trailing ':' if it has one */ if (grp[tail = strlen(grp) - 1] == ':') grp[tail] = '\0'; if (group) *group = grp; if (article) *article = i; } return 0; } /* Structure to store the header information (envelope) */ typedef struct { char cc[128]; /* cc: */ char bcc[128]; /* bcc: */ char from[64]; /* Reply-To: or From: */ char address[64]; char subject[128]; /* Subject: */ char date[40]; /* Date: */ char id[80]; /* Message-ID: */ char keywords[256]; /* Keywords: */ char organization[80]; /* Organization: */ char followup[126]; /* Followup-To: */ char newsgroups[BUFSIZ]; /* Newgroups: */ char xrefs[BUFSIZ / 2]; /* In-Reply-To: */ char refs[BUFSIZ]; /* References: */ } envelope_t; static char * ReadHeaderLine(char *buf, size_t len, FILE *infp) { char *tcp; int ahead; /* lookahead token */ if ((tcp = fgets(buf, len, infp)) != NULL) { /* Check if continuation line */ while ((ahead = fgetc(infp)) == '\t' || ahead == ' ') { char tmp[256]; if (fgets(tmp, sizeof(tmp), infp) != NULL) { tcp = StripTail(buf); strcat(tcp, " "); strcat(tcp, tmp); } } /* while */ ungetc(ahead, infp); /* push back */ } return StripTail(tcp); } static int ParseRFC822Header(envelope_t * envelope, FILE * infp) { char tmp[BUFSIZ]; char *tcp; int mime = 0; memset(envelope, 0, sizeof(envelope_t)); /* Read the header bits */ /* Everything after first null line is message body */ while ((tcp = ReadHeaderLine(tmp, sizeof(tmp), infp)) != NULL && *tcp) { /* TAGS in RFC-822 Header */ switch (*tcp++) { case 'b': case 'B': /* possible bcc: */ if (strncasecomp("cc: ", tcp, 4) == 0) HeaderLine(envelope -> bcc, tcp + 4); break; case 'c': case 'C': /* possible cc: or Content-<*>: */ /* MIME NOT YET SUPPORTED */ if (strncasecomp("ontent-", tcp, 7) == 0) mime++; else if (strncasecomp("c: ", tcp, 3) == 0) HeaderLine(envelope -> cc, tcp + 3); break; case 'd': case 'D': /* possible Date: */ if (strncasecomp("ate: ", tcp, 5) == 0) HeaderLine(envelope -> date, tcp + 5); break; case 'f': case 'F': /* possible From: or Followup-To: */ /* "Reply-to" SUPERSEDES the "From" field */ if (*(envelope -> from) == '\0' && strncasecomp("rom: ", tcp, 5) == 0) { NameKey(envelope -> address, tcp + 5, FALSE); NameKey(envelope -> from, tcp + 5, TRUE); } else if (strncasecomp("ollowup-To: ", tcp, 12) == 0) strcpy(envelope -> followup, tcp + 12); break; case 'i': case 'I': /* possible In-Reply-To: */ if (strncasecomp("n-Reply-To: ", tcp, 12) == 0) strcpy(envelope -> xrefs, tcp + 12); break; case 'k': case 'K': /* possible Keywords: */ if (strncasecomp("eywords: ", tcp, 9) == 0) HeaderLine(envelope -> keywords, tcp + 9); break; case 'm': case 'M': /* possible Message-ID: or MIME-Version: */ if (strncasecomp("essage-ID: ", tcp, 11) == 0) MessageKey(envelope -> id, tcp + 11); else if (strncasecomp("IME-Version: ", tcp, 13) == 0) mime++; break; case 'n': case 'N': /* possible Newsgroups: */ if (strncasecomp("ewsgroups: ", tcp, 11) == 0) HeaderLine(envelope -> newsgroups, tcp + 11); break; case 'o': case 'O': /* possible Organization: */ if (strncasecomp("rganization: ", tcp, 13) == 0) HeaderLine(envelope -> organization, tcp + 13); break; case 'r': case 'R': /* possible Reply-To: or References: */ if (strncasecomp("eply-To: ", tcp, 9) == 0) { NameKey(envelope -> address, tcp + 9, FALSE); NameKey(envelope -> from, tcp + 9, TRUE); } else if (strncasecomp("eferences: ", tcp, 11) == 0) strcpy(envelope -> refs, tcp + 11); break; case 's': case 'S': /* possible Subject: or Sender: */ if (strncasecomp("ubject: ", tcp, 8) == 0) HeaderLine(envelope -> subject, tcp + 8); break; } /* switch */ } if (*(envelope -> id) == '\0') { static int count = 1; /* generate psuedo ID */ sprintf(envelope -> id, "FAKE%05o", count++); } if (*(envelope -> subject) == '\0') strcpy(envelope -> subject, "No Subject"); return mime; } static void LocateAnchors(contents_t ** Contents, int isNews, char *group, FILE * infp, FILE * outfp) { int mime; envelope_t envelope; static const char item[] = "<DT>%s:<DD>%s\n"; #define xItem(s, n, v) if (s) fprintf(outfp, item, n, v); #define Item(n, v) xItem(*(v), (n), (v)) mime = ParseRFC822Header(&envelope, infp); /* Add Message To Table of Contents */ AddMessage(Contents ,ftell(outfp) ,isNews ,envelope.id ,envelope.subject ,envelope.from[0] ? envelope.from : "Annonymous" ,group); /* Print Header */ fprintf(outfp, "\n<DL>\n");; Item("Subject", envelope.subject); Item("From", envelope.from); if (envelope.address[0]) fprintf(outfp, "<DT>Reply to:<DD><ADDRESS>%s</ADDRESS>\n", envelope.address); Item("Organization", envelope.organization); Item("Date", envelope.date); xItem(envelope.xrefs[0], "In-Reply-To", Anchor(envelope.xrefs)); xItem(envelope.refs[0], "References", Anchor(envelope.refs)); Item("Followup-To", envelope.followup); Item("cc", envelope.cc); Item("bcc", envelope.bcc); Item("Keywords", envelope.keywords); if (isNews) fprintf(outfp, "<DT>Usenet %s:<DD>Article %d\n", group, isNews); xItem((envelope.newsgroups[0]) && (!isNews || strcmp(group, envelope.newsgroups)), "Cross Posted Newsgroups", envelope.newsgroups); fprintf(outfp, "</DL>\n"); /* If we saw a MIME Header send out a warning message */ if (mime) fprintf(outfp, "\ <P><STRONG>WARNING: The message contained a MIME header (NOT YET Supported)</STRONG><P>\n"); #undef Item #undef xItem } int ParseMail(contents_t ** Contents, FILE * infp, FILE * outfp) { char tmp[BUFSIZ]; int count = 0; int lines = 0; int isNews; char *group; while (fgets(tmp, sizeof(tmp), infp) != NULL) { StripTail(tmp); if ((isNews = IsMailFromLine(tmp)) == 0 || IsNewsLine(tmp, &isNews, &group) == 0) { lines = 0; /* Mail header */ if (count++) fprintf(outfp, "</PRE>\n"); LocateAnchors(Contents, isNews, group, infp, outfp); } else { if (lines++ == 0) fprintf(outfp, "<PRE>\n\n"); PutBodyLine(tmp, outfp); /* Body */ } } /* while */ if (lines) fprintf(outfp, "</PRE>"); return count; } static void Fatal(int code, const char *message) { /* Send the error message to stdout */ printf("<TITLE>Mail Server Error Message</TITLE>\n\ <PLAINTEXT>\n\n%s\n\n", message); exit(code); } static const char * basename(char *string) { const char *tcp; for (tcp = string + strlen(string); *tcp != '/' && tcp > string; tcp--) /* loop */ ; return (*tcp == '/') ? ++tcp : tcp; } static int SendDocument(const char *title, const char *filename, long start, long end) { long length = end - start; int ch; FILE *fp; if ((fp = fopen(filename, "r")) == NULL) return -1; if (fseek(fp, start, 0) == -1) return -1; /* seek error */ /* Produce HTML Document */ fputs("<HTML>\n<HEAD>\n<TITLE>", stdout); fputs(title, stdout); fputs("</TITLE>\n</HEAD>\n<BODY>\n", stdout); fputs("\n", stdout); while ((ch = getc(fp)) != EOF && length--) putc((char) ch, stdout); fputs("</BODY></HTML>\n", stdout); return 0; } int FetchMessage(const char *name) { FILE *fp; char tmp[BUFSIZ]; const char hash = HASH(name); int result = -1; if ((fp = fopen(IndexFile, "r")) != NULL) while (fgets(tmp, sizeof(tmp), fp) != NULL) if (tmp[0] == hash) { char *anchor; char *filename; char *title; char *range; anchor = strtok(tmp+1, "\t"); filename = strtok(NULL, "\t"); range = strtok(NULL, "\t"); title = strtok(NULL, "\n"); if (anchor && filename && range && strcmp(anchor, name) == 0) { long start, end; if (sscanf(range, "%ld-%ld", &start, &end) != 2) continue; result = SendDocument( title ? title : anchor, DecodeAnchor(tmp, filename), start, end ); break; } } fclose(fp); return result; } static FILE * OpenContentsFile(const char *name, const char *mode) { char tmp[MAXPATHLEN]; strcpy(tmp, name); strcat(tmp, body_ext); return fopen(tmp, mode); } int main(int argc, char **argv) { FILE *outfp; FILE *fp; contents_t *Contents = NULL; char filename[MAXPATHLEN]; InitCharTable(); if (argc == 2) { DecodeAnchor(filename, argv[1]); if (strncmp(filename, "/MessageID/", 11) == 0) { if (FetchMessage(filename + 11)) Fatal(E_ENOENT); return 0; } else { char tmp[MAXPATHLEN]; struct stat statbuf1; struct stat statbuf2; /* Check if $1.html exists */ strcpy(tmp, filename); strcat(tmp, DIRECTORY_EXTENSION); if (stat(filename, &statbuf1) == 0 && stat(tmp, &statbuf2) == 0 && statbuf2.st_mtime > statbuf1.st_mtime && statbuf2.st_size > 200) { /* The cache exists and its newer */ if ((outfp = fopen(tmp, "r")) != NULL) { CatStream(outfp, stdout); /* Cat the cache */ fclose(outfp); return 0; /* DONE */ } else outfp = stdout; /* Don't bother with caching */ } else if ((outfp = fopen(tmp, "w+")) == NULL) outfp = stdout; } } else Fatal(E_USAGE); /* Open Input and parse to build Messages file */ if ((fp = OpenContentsFile(filename, "w")) != NULL) { FILE *infp; if ((infp = fopen(filename, "r")) == NULL) Fatal(E_ENOENT); ParseMail(&Contents, infp, fp); fclose(infp); fflush(fp); DumpDictionary(Contents, filename, ftell(fp)); fclose(fp); } else Fatal(E_NOTEMP); /* Produce HTML Entry Document */ fputs("\n", outfp); fprintf(outfp, "<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n</HEAD>\n<BODY>\n", basename(filename)); if (Contents) PrintContents(Contents, outfp); else /* Was an error, so just cat the contents to recover */ CatStream(OpenContentsFile(filename, "r"), outfp); fputs("</BODY></HTML>\n", outfp); if (fileno(outfp) != fileno(stdout)) CatStream(outfp, stdout); /* Cat File to stdout */ return 0; }